{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.metrics import roc_curve, auc\n",
    "from timeit import default_timer as timer\n",
    "# needed to see images\n",
    "from IPython.display import display, Image\n",
    "import pickle\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "\n",
    "# needed for plotting\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import seaborn as sns\n",
    "current_palette = sns.color_palette()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# using TensorFlow V.12\n",
    "tf.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def getkey(item):\n",
    "    fpr, tpr, _ = roc_curve(test_labels.ravel(), test_preds[item])\n",
    "    roc_auc = auc(fpr, tpr)\n",
    "    return roc_auc\n",
    "\n",
    "def plotROC(testlabels, test_preds):\n",
    "    classifiers = list(test_preds.keys())\n",
    "\n",
    "    # Plot all ROC curves\n",
    "    plt.figure(figsize=(15,9))\n",
    "    for i, clf in zip(range(len(classifiers)), sorted(classifiers, key=getkey, reverse=True)):\n",
    "        fpr, tpr, _ = roc_curve(testlabels.ravel(), test_preds[clf] )\n",
    "        roc_auc = auc(fpr, tpr)\n",
    "        plt.plot(fpr, tpr,\n",
    "                 label='ROC curve '+ clf +  ' (area = {0:0.4f})'\n",
    "                       ''.format(roc_auc), linestyle='-', linewidth=2)\n",
    "\n",
    "\n",
    "    plt.plot([0, 1], [0, 1], 'k--', lw=2)\n",
    "    plt.xlim([-0.1, 1.0])\n",
    "    plt.ylim([0.0, 1.05])\n",
    "    plt.xlabel('False Positive Rate')\n",
    "    plt.ylabel('True Positive Rate')\n",
    "    plt.title('Comparison of multiclass ROC curves')\n",
    "    plt.legend(loc=\"lower right\", fontsize=14)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = pickle.load(open('const_shape.p', 'rb'))\n",
    "print(data.shape)\n",
    "print(data.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data_cols = data.columns[:820]\n",
    "# removing last two samples, because 8730 is much easier to iterate in chunks than 8732.\n",
    "s = data[:8730]\n",
    "print('working dataframe\\'s shape:', s.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# test train split\n",
    "test_preds = {}\n",
    "train = s[0:6984]\n",
    "test = s[6984:]\n",
    "\n",
    "# one hot encode labels\n",
    "LB = LabelBinarizer().fit(train['label'])\n",
    "test_labels = LB.transform(test['label'])\n",
    "\n",
    "# print shapes\n",
    "print('train shape {}\\ntest  shape {}\\niterable chunks: 1, 2, 3, 6, 9, 18, 97, 194, 291, 582, 873, 1746'.format(train.shape, test.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "del data, s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def accuracy(predictions, labels):\n",
    "    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))\n",
    "            / predictions.shape[0])\n",
    "\n",
    "def weight_variable(shape):\n",
    "    initial = tf.truncated_normal(shape, stddev=0.01)\n",
    "    return tf.Variable(initial)\n",
    "\n",
    "def bias_variable(shape):\n",
    "    initial = tf.constant(0.1, shape=shape)\n",
    "    return tf.Variable(initial)\n",
    "\n",
    "def test_accuracy(session, test_data=test, during = True):\n",
    "    \"\"\"compute test accuracy in chunks so that we don't chash our RAM\"\"\"\n",
    "    test_data.reset_index(inplace=True, drop=True)\n",
    "    # \n",
    "    epoch_pred = session.run(prediction, feed_dict={tf_data : np.array(test_data.loc[0:check_size-1,data_cols]).reshape(check_size,20,41), keep_prob : 1.0})\n",
    "    for i in range(check_size, test_data.shape[0], check_size):\n",
    "        epoch_pred = np.concatenate([epoch_pred, session.run(prediction, \n",
    "                                    feed_dict={tf_data : np.array(test_data.loc[i:i+check_size-1,data_cols]).reshape(check_size,20,41), keep_prob : 1.0})], axis=0)\n",
    "    if during:\n",
    "        return accuracy(epoch_pred, test_labels)\n",
    "    else:\n",
    "        return epoch_pred"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Run Session"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "num_labels = 10\n",
    "batch_size = 97\n",
    "acc_over_time = {}\n",
    "def Run_Session(num_epochs, name, k_prob=1.0, mute=False, record=False):\n",
    "    global train\n",
    "    \n",
    "    start = timer()\n",
    "    with tf.Session(graph=graph) as session:\n",
    "        if record:\n",
    "            merged = tf.merge_all_summaries()  \n",
    "            writer = tf.train.SummaryWriter(\"/tmp/tensorflowlogs\", session.graph)\n",
    "        \n",
    "        #tf.initialize_all_variables().run() # commented because of v.12\n",
    "        tf.global_variables_initializer().run()\n",
    "\n",
    "        print(\"Initialized\")\n",
    "        accu = []\n",
    "        \n",
    "        for epoch in range(num_epochs):\n",
    "            \n",
    "            # get batch\n",
    "            train_batch = train.sample(batch_size)\n",
    "            \n",
    "            t_d = np.asarray(train_batch[data_cols]).reshape(batch_size,20,41)\n",
    "            t_l = LB.transform(train_batch['label'])\n",
    "            \n",
    "            # make feed dict\n",
    "            feed_dict = { tf_data : t_d, train_labels : t_l, keep_prob : k_prob}\n",
    "            \n",
    "            # run model on batch\n",
    "            _, l, predictions = session.run([optimizer, loss, prediction], feed_dict=feed_dict)\n",
    "            \n",
    "            # mid model accuracy checks \n",
    "            if (epoch % 1000 == 0) and not mute:\n",
    "                print(\"\\tMinibatch loss at epoch {}: {}\".format(epoch, l))\n",
    "                print(\"\\tMinibatch accuracy: {:.1f}\".format(accuracy(predictions, t_l)))\n",
    "            if (epoch % 5000 == 0) and not mute:\n",
    "                print(\"Test accuracy: {:.1f}\".format(test_accuracy(session, during=True)))\n",
    "            if (epoch % 1000 == 0) and not mute:\n",
    "                accu.append(tuple([epoch, test_accuracy(session, during=True)]))\n",
    "                \n",
    "        # record accuracy and predictions\n",
    "        test_preds[name] = test_accuracy(session, during=False)\n",
    "        print(\"Final Test accuracy: {:.1f}\".format(accuracy(test_preds[name], test_labels)))\n",
    "        end = timer()\n",
    "        test_preds[name] = test_preds[name].ravel()\n",
    "        acc_over_time[name] = accu\n",
    "        print(\"time taken: {0} minutes {1:.1f} seconds\".format((end - start)//60, (end - start)%60))\n",
    "        #tf.train.export_meta_graph()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# constants\n",
    "num_labels = 10\n",
    "batch_size = 97\n",
    "check_size = 97\n",
    "rows, cols = 20, 41\n",
    "n_hidden = 600\n",
    "beta = 0.04\n",
    "\n",
    "graph = tf.Graph()\n",
    "with graph.as_default():\n",
    "    # placeholders\n",
    "    tf_data = tf.placeholder(tf.float32, shape=[None, rows, cols])\n",
    "    train_labels = tf.placeholder(tf.float32, shape=[None, num_labels])\n",
    "    keep_prob = tf.placeholder(tf.float32)\n",
    "    \n",
    "    # weights and biases\n",
    "    layer1_weights = weight_variable([rows*n_hidden, num_labels])\n",
    "    layer1_biases = bias_variable([num_labels])\n",
    "    \n",
    "    # model\n",
    "    def model(data, proba=1.0):\n",
    "        # make RNN Cell\n",
    "        cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, state_is_tuple=True)\n",
    "        \n",
    "        # use that RNN cell\n",
    "        layer1, _istate = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)\n",
    "        \n",
    "        layer1 = tf.reshape(layer1, shape=[batch_size, rows*n_hidden])\n",
    "        \n",
    "        layer1 = tf.nn.dropout(layer1, proba)\n",
    "        return tf.matmul(layer1, layer1_weights) + layer1_biases\n",
    "\n",
    "    # Training computation.\n",
    "    logits = model(tf_data, keep_prob)\n",
    "    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, train_labels) +\n",
    "                         beta*tf.nn.l2_loss(layer1_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer1_biases))\n",
    "\n",
    "    # Optimizer.\n",
    "    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)\n",
    "\n",
    "    # Predictions for the training, validation, and test data.\n",
    "    # !!!! as this model does not allow calling model more than once we will need to get test predictions\n",
    "    #      by running the test data through the same prediction op as the train data. !!!\n",
    "    # !!! make note to not run optimizer on same function call !!!\n",
    "    prediction = tf.nn.softmax(logits) \n",
    "    print('Basic RNN model made')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Run_Session(20000, 'RNN', .5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DeepNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# constants\n",
    "num_labels = 10\n",
    "\n",
    "batch_size = 97\n",
    "check_size = 582\n",
    "rows, cols = 20, 41\n",
    "\n",
    "n_hidden1 = 1200\n",
    "n_hidden2 = 1500\n",
    "n_hidden3 = 1200\n",
    "\n",
    "beta = 0.01\n",
    "\n",
    "graph = tf.Graph()\n",
    "with graph.as_default():\n",
    "    # placeholders\n",
    "    tf_data = tf.placeholder(tf.float32, shape=[None, rows, cols])\n",
    "    train_labels = tf.placeholder(tf.float32, shape=[None, num_labels])\n",
    "    keep_prob = tf.placeholder(tf.float32)\n",
    "    \n",
    "    # weights and biases\n",
    "    layer1_weights = weight_variable([rows*cols, n_hidden1])\n",
    "    layer1_biases = bias_variable([n_hidden1])\n",
    "    layer2_weights = weight_variable([n_hidden1, n_hidden2])\n",
    "    layer2_biases = bias_variable([n_hidden2])\n",
    "    layer3_weights = weight_variable([n_hidden2, n_hidden3])\n",
    "    layer3_biases = bias_variable([n_hidden3])\n",
    "    layer4_weights = weight_variable([n_hidden3, num_labels])\n",
    "    layer4_biases = bias_variable([num_labels])\n",
    "\n",
    "    # model\n",
    "    def model(data, proba=1.0):\n",
    "        \n",
    "        layer1 = tf.nn.relu(tf.matmul(tf.reshape(data, [-1, rows*cols]), layer1_weights) + layer1_biases)\n",
    "        layer1 = tf.nn.dropout(layer1, proba)\n",
    "        \n",
    "        layer2 = tf.nn.relu(tf.matmul(layer1, layer2_weights) + layer2_biases)\n",
    "        layer2 = tf.nn.dropout(layer2, proba)\n",
    "        \n",
    "        layer3 = tf.nn.relu(tf.matmul(layer2, layer3_weights) + layer3_biases)\n",
    "        layer3 = tf.nn.dropout(layer3, proba)\n",
    "        \n",
    "        return tf.matmul(layer3, layer4_weights) + layer4_biases\n",
    "\n",
    "    # Training computation.\n",
    "    logits = model(tf_data, keep_prob)\n",
    "    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, train_labels) +\n",
    "                         beta*tf.nn.l2_loss(layer1_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer1_biases) +\n",
    "                         beta*tf.nn.l2_loss(layer2_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer2_biases) +\n",
    "                         beta*tf.nn.l2_loss(layer3_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer3_biases) +\n",
    "                         beta*tf.nn.l2_loss(layer4_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer4_biases))\n",
    "\n",
    "    # Optimizer.\n",
    "    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)\n",
    "\n",
    "    # Predictions for the training, validation, and test data.\n",
    "    # !!!! as this RNN model does not allow calling model more than once we will need to get test predictions\n",
    "    #      by running the test data through the same prediction op as the train data. !!!\n",
    "    # !!! make note to not run optimizer on same function call !!!\n",
    "    prediction = tf.nn.softmax(logits)  \n",
    "    print('Basic DeepNN model made')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Run_Session(20000, 'DeepNN', .5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# single layer CNN\n",
    "num_labels = 10\n",
    "\n",
    "batch_size = 97\n",
    "check_size = 582\n",
    "rows, cols = 20, 41\n",
    "patch_size = 5\n",
    "num_channels = 1\n",
    "depth1 = 32\n",
    "\n",
    "num_hidden = 2050\n",
    "\n",
    "beta = 0.01\n",
    "\n",
    "graph = tf.Graph()\n",
    "with graph.as_default():\n",
    "    \n",
    "    tf_data = tf.placeholder(tf.float32, shape=(None, rows, cols))\n",
    "    train_labels = tf.placeholder(tf.float32, shape=(None, num_labels))\n",
    "    #tf_test_dataset = tf.placeholder(tf.float32, shape=(check_size, image_size, image_size, num_channels))\n",
    "\n",
    "    # Variables.\n",
    "    layer1_weights = weight_variable([patch_size, patch_size, 1, depth1])\n",
    "    layer1_biases = bias_variable([depth1])\n",
    "    layer2_weights = weight_variable([rows//2 * (cols//2 + 1) * depth1, num_hidden])\n",
    "    layer2_biases = bias_variable([num_hidden])\n",
    "    layer3_weights = weight_variable([num_hidden, num_labels])\n",
    "    layer3_biases = bias_variable([num_labels])\n",
    "\n",
    "    keep_prob = tf.placeholder(tf.float32)\n",
    "    # Model with dropout\n",
    "    def model(data, proba=keep_prob):\n",
    "        # Convolution\n",
    "        conv1 = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1] , padding='SAME') + layer1_biases\n",
    "        pooled1 = tf.nn.max_pool(tf.nn.relu(conv1), ksize=[1, 2, 2, 1],strides=[1, 1, 1, 1], padding='SAME')\n",
    "\n",
    "        # Fully Connected Layer\n",
    "        shape = pooled1.get_shape().as_list()\n",
    "        reshape = tf.reshape(pooled1, [-1, shape[1] * shape[2] * shape[3]])\n",
    "        full2 = tf.nn.relu(tf.matmul(reshape, layer2_weights) + layer2_biases)\n",
    "\n",
    "        # Dropout\n",
    "        full2 = tf.nn.dropout(full2, proba)\n",
    "        \n",
    "        return tf.matmul(full2, layer3_weights) + layer3_biases\n",
    "\n",
    "    # Training computation.\n",
    "    logits = model(tf.expand_dims(tf_data, -1), keep_prob)\n",
    "    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, train_labels) +\n",
    "                         beta*tf.nn.l2_loss(layer1_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer1_biases) +\n",
    "                         beta*tf.nn.l2_loss(layer2_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer2_biases) +\n",
    "                         beta*tf.nn.l2_loss(layer3_weights) +\n",
    "                         beta*tf.nn.l2_loss(layer3_biases))\n",
    "\n",
    "    # Optimizer.\n",
    "    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)\n",
    "\n",
    "    # Predictions for the training, validation, and test data.\n",
    "    prediction = tf.nn.softmax(logits)\n",
    "    #test_prediction = tf.nn.softmax(model(tf_test_dataset,1.0))  \n",
    "    print('Basic CNN model made')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Run_Session(20000, 'CNN', .5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.plot(np.array(acc_over_time['RNN'])[:,0], np.array(acc_over_time['RNN'])[:,1])\n",
    "plt.plot(np.array(acc_over_time['DeepNN'])[:,0], np.array(acc_over_time['DeepNN'])[:,1])\n",
    "plt.plot(np.array(acc_over_time['CNN'])[:,0], np.array(acc_over_time['CNN'])[:,1])\n",
    "plt.title('Accuracy')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# record measurements\n",
    "#dataset_acc = pickle.load(open('dataset_acc.p','rb'))\n",
    "dataset_acc['const_MFCC'] = {'DeepNN': np.array(acc_over_time['DeepNN'])[-1,1],\n",
    "                            'RNN': np.array(acc_over_time['RNN'])[-1,1],\n",
    "                          'CNN': np.array(acc_over_time['CNN'])[-1,1]}\n",
    "#pickle.dump(dataset_acc, open('dataset_acc.p','wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dataset_acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plotROC(test_labels, test_preds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
